Overview : challenge 1

Challange 1 is about predicting the time courses of some markers in some cell-lines.

Cell-lines show various trends in response to stimuli and certain drug treatments.

We have divided the measured cell-lines to a trainig and a test and a validation set.

In the following section we show some examples of these features from the training set.

Your task will be to predict the interestingly behaving markers’ time courses in the test and validation set.

Input data shaping:

## Warning: package 'pheatmap' was built under R version 3.5.2
## # A tibble: 130,118 x 7
##    cell_line treatment  time time_course cellcount reporter value
##    <fct>     <fct>     <dbl> <fct>           <int> <chr>    <dbl>
##  1 HCC1428   iPI3K         9 A                8831 IdU       2.60
##  2 HCC1428   EGF          23 A                7749 IdU       2.61
##  3 HCC1428   iPI3K        40 A                8010 IdU       2.59
##  4 HCC1428   imTOR        40 A                8465 IdU       2.59
##  5 HCC1428   iEGFR        40 A                8023 IdU       2.45
##  6 HCC1428   iPKC         40 A                6395 IdU       2.51
##  7 HCC1428   iMEK         40 A                7289 IdU       3.27
##  8 HCC1428   imTOR         9 A               10871 IdU       2.57
##  9 HCC1428   iEGFR         9 A                9942 IdU       2.50
## 10 HCC1428   imTOR        13 A                8559 IdU       2.62
## # … with 130,108 more rows

Show Interesting features to support challenge questions

Response to EGF

on absolute scale

pdf("./figures/median_phospho_EGF_response.pdf")
for(marker in unique(phospho_median$reporter)){
    gg = phospho_median %>% 
    filter(treatment=="EGF", reporter == marker) %>%
    group_by(cell_line, time) %>% summarise(value=mean(value)) %>%
    ggplot(aes(time,value)) + 
    geom_line() +
    facet_wrap(~cell_line) + theme_bw() + guides(color=FALSE) + ggtitle(marker)
    print(gg)
}
dev.off()
## quartz_off_screen 
##                 2

pCREB

creb_responders = c("HCC1428", "HCC1569","HCC1599", "HCC2218","MDAMB468")
creb_nonresponders = c("MDAkb2","MACLS2", "T47D")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == "p-CREB") %>%
    filter(cell_line %in% c(creb_responders,creb_nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle("p-CREB",subtitle = "responding and non-responding cell-lines")

pSTAT5

pSite = "p-STAT5"
responders = c("AU565", "HCC1806","HCC1187", "MCF12A","MDAMB468")
nonresponders = c("BT20","BT483", "T47D")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responding and non-responding cell-lines")

# pSRC I dont see cell lines responding to EGF

p-FAK

mostly non-responsive

pSite = "p-FAK"
responders = c("DU4475", "MCF12A","BT20")
nonresponders = c("UACC893","KPL1", "HCC38")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responding and non-responding cell-lines")

## p-MEK

pSite = "p-MEK"
responders = c("AU565", "DU4475","HCC2157","MDAMB436")
nonresponders = c("BT483","KPL1", "MX1","ZR7530")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responding and non-responding cell-lines")

p−S6K

pSite = "p-S6K"
responders = c("AU565", "DU4475","BT483","HCC1187")
nonresponders = c("BT483","KPL1", "CAL120","MFM223","MPE600")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responding and non-responding cell-lines")

p−MAP2K3

pretty noisy and the range of values are narrow

p−STAT1

pSite = "p-STAT1"
responders = c("AU565", "HCC1806", "BT20","MDAMB468")
nonresponders = c("BT483", "T47D","HCC1187")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responding and non-responding cell-lines")

## p−p53 does not have much dynamics

pSite = "p-p53"
responders = c("184B5", "DU4475", "HCC1143","Hs578T")
nonresponders = c("BT483", "T47D","HCC1187")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responding and non-responding cell-lines")

p−NFkB

pSite = "p-NFkB"
responders = c("184B5","AU565" ,"BT549", "CAL851","HCC1937","MCF12A","HCC1954")
nonresponders = c("BT483", "T47D","EFM19")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responding and non-responding cell-lines")

## p−p38 we nice dynamics !!

pSite = "p-p38"
responders = c("184B5","AU565" ,"BT549", "CAL851","HCC1937","MCF12A","HCC1954","BT483", "T47D")
nonresponders = c("EFM19")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "plateau and peaks")

## p−AMPK increasing and decreaseing dynamics

pSite = "p-AMPK"
increaseing = c("184A1","BT20",   "HCC1937", "HCC1937","HCC1937")
decreasing = c("CAL851", "HDQP1","MCF10F","ZR751","HDQP1")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(increaseing,decreasing)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "up and downs")

p-AKT_S473

increasing and decreaseing dynamics

pSite = "p-Akt(Ser473)"
plateau = c("AU565","HBL100",   "HCC1937", "HCC70","Hs578T")
peaks = c("CAL851", "MCF12A","MDAkb2","UACC893","SKBR3")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(plateau,peaks)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "up and downs")

## p-ERK increasing and decreaseing dynamics

pSite = "p-ERK"
plateau = c("AU565",   "MDAMB436", "HCC1395","CAL51")
peaks = c("HBL100", "MCF12A","MDAkb2","UACC893","SKBR3")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(plateau,peaks)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "up and downs")

CyclinB

no much dynamics

GSK3b

pSite = "p-GSK3b"
responders = c("184B5",   "CAL120", "AU565","MDAMB157","UACC893")
nonresponders = c("HBL100", "HDQP1","MDAkb2","T47D")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

GAPDH

no strong response

p-MKK3-MKK6

pSite = "p-MKK3-MKK6"
responders = c("184A1", "HBL100",  "CAL120", "HCC2185","MDAMB157","HCC70")
nonresponders = c("AU565", "HDQP1","MDAkb2","T47D")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

### p-PDPK1 effect is not really clear

pSite = "p-PDPK1"
responders = c("184B5", "MX1")
nonresponders = c("HCC1806","T47D")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

### p-BTK effect is not really clear

pSite = "p-BTK"
responders = c("HCC2185","DU4475", "HCC2185","MCF7","ZR7530","184A1")
nonresponders = c("MDAkb2","T47D")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

p-p90RSK

good signals

pSite = "p-p90RSK"
plateau = c("AU565",   "MDAMB436", "HCC1395","CAL51")
peaks = c("HBL100", "MCF12A","MDAkb2","UACC893","SKBR3")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(plateau,peaks)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

### p-SMAD23 effect is not really clear

pSite = "p-SMAD23"
samples = c("DU4475",   "CAL120", "HCC70","T47D")

phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(samples)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

p-STAT3

clearly most of the time no response, sometimes good peaks.

pSite = "p-STAT3"
responders = c("184A1",   "184B5", "BT20","HCC1806","MDAMB468")
nonresponders = c("HCC2218","MPE600")
phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

p-JNK

effect is not really clear

pSite = "p-JNK"
responders = c("184A1",   "184B5", "BT549","MCF10A","SKBR3")
nonresponders = c("HCC1806","MDAMB453")

phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

### Ki-67 not much dynamics

p-H3

effect is not really clear

pSite = "p-H3"

responders = c("184A1",   "184B5", "HCC1937","MCF10A","SKBR3","UACC893")
nonresponders = c("BT549","T47D","DU4475")

phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

### p-S6 really nice signals

pSite = "p-S6"

responders = c("184A1",   "HCC1187", "MCF12A","MCF10A","SKBR3","UACC893")
nonresponders = c("CAL51","EFM192A","MACLS2")

phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

cleavedCas

not clear.

p-MKK4

nice signals ! plateau and peaks

pSite = "p-MKK4"

responders = c("AU565",   "HCC2218", "CAL120","HCC1187","T47D","MDAMB157")
nonresponders = c("MCF7","HCC1500","HCC70")

phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

p−AKT(Thr308)

many unclear but some are nice .

pSite = "p-AKT(Thr308)"

responders = c("MCF12A",   "HCC1143", "DU4475","Hs578T","T47D")
nonresponders = c("MCF7","HCC1500","HCC70")

phospho_median %>% 
    filter(treatment=="EGF") %>%
    filter(reporter == pSite) %>%
    filter(cell_line %in% c(responders,nonresponders)) %>%
    ggplot(aes(time,value,group=cell_line)) + 
    geom_line() +
    facet_wrap(~cell_line) + 
    theme_bw() + 
    guides(color=FALSE) +
    geom_smooth(formula = "y~x", method = "loess") +
    ggtitle(pSite,subtitle = "responders and non-responsive nodes")

4EBP1 and RB

not much dynamics